// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "net/cookies/CookieUtil.h"

#include <cstdio>
#include <cstdlib>

#include "third_party/WebKit/Source/platform/weborigin/KURL.h"
#include "third_party/WebKit/Source/wtf/text/WTFStringUtil.h"

namespace net {
namespace cookie_util {

// See make_dafsa.py for documentation of the generated dafsa byte array.

#include "EffectiveTldNamesInc.h"

const unsigned char* g_graph = kDafsa;
size_t g_graph_length = sizeof(kDafsa);

const int kNotFound = -1;
const int kExceptionRule = 1;
const int kWildcardRule = 2;
const int kPrivateRule = 4;

// Read next offset from pos.
// Returns true if an offset could be read, false otherwise.
bool GetNextOffset(const unsigned char** pos, const unsigned char* end,
	const unsigned char** offset) {
	if (*pos == end)
		return false;

	// When reading an offset the byte array must always contain at least
	// three more bytes to consume. First the offset to read, then a node
	// to skip over and finally a destination node. No object can be smaller
	// than one byte.
	//CHECK_LT(*pos + 2, end);
	size_t bytes_consumed;
	switch (**pos & 0x60) {
	case 0x60:  // Read three byte offset
		*offset += (((*pos)[0] & 0x1F) << 16) | ((*pos)[1] << 8) | (*pos)[2];
		bytes_consumed = 3;
		break;
	case 0x40:  // Read two byte offset
		*offset += (((*pos)[0] & 0x1F) << 8) | (*pos)[1];
		bytes_consumed = 2;
		break;
	default:
		*offset += (*pos)[0] & 0x3F;
		bytes_consumed = 1;
	}
	if ((**pos & 0x80) != 0) {
		*pos = end;
	}
	else {
		*pos += bytes_consumed;
	}
	return true;
}

// Check if byte at offset is last in label.
bool IsEOL(const unsigned char* offset, const unsigned char* end) {
	ASSERT(offset < end);
	return (*offset & 0x80) != 0;
}

// Check if byte at offset matches first character in key.
// This version matches characters not last in label.
bool IsMatch(const unsigned char* offset, const unsigned char* end,
	const char* key) {
	ASSERT(offset < end);
	return *offset == *key;
}

// Check if byte at offset matches first character in key.
// This version matches characters last in label.
bool IsEndCharMatch(const unsigned char* offset, const unsigned char* end,
	const char* key) {
	ASSERT(offset < end);
	return *offset == (*key | 0x80);
}

// Read return value at offset.
// Returns true if a return value could be read, false otherwise.
bool GetReturnValue(const unsigned char* offset, const unsigned char* end,
	int* return_value) {
	ASSERT(offset < end);
	if ((*offset & 0xE0) == 0x80) {
		*return_value = *offset & 0x0F;
		return true;
	}
	return false;
}

// Lookup a domain key in a byte array generated by make_dafsa.py.
// The rule type is returned if key is found, otherwise kNotFound is returned.
int LookupString(const unsigned char* graph, size_t length, const char* key,
	size_t key_length) {
	const unsigned char* pos = graph;
	const unsigned char* end = graph + length;
	const unsigned char* offset = pos;
	const char* key_end = key + key_length;
	while (GetNextOffset(&pos, end, &offset)) {
		//   char <char>+ end_char offsets
		//   char <char>+ return value
		//   char end_char offsets
		//   char return value
		//   end_char offsets
		//   return_value
		bool did_consume = false;
		if (key != key_end && !IsEOL(offset, end)) {
			// Leading <char> is not a match. Don't dive into this child
			if (!IsMatch(offset, end, key))
				continue;
			did_consume = true;
			++offset;
			++key;
			// Possible matches at this point:
			// <char>+ end_char offsets
			// <char>+ return value
			// end_char offsets
			// return value
			// Remove all remaining <char> nodes possible
			while (!IsEOL(offset, end) && key != key_end) {
				if (!IsMatch(offset, end, key))
					return kNotFound;
				++key;
				++offset;
			}
		}
		// Possible matches at this point:
		// end_char offsets
		// return_value
		// If one or more <char> elements were consumed, a failure
		// to match is terminal. Otherwise, try the next node.
		if (key == key_end) {
			int return_value;
			if (GetReturnValue(offset, end, &return_value))
				return return_value;
			// The DAFSA guarantees that if the first char is a match, all
			// remaining char elements MUST match if the key is truly present.
			if (did_consume)
				return kNotFound;
			continue;
		}
		if (!IsEndCharMatch(offset, end, key)) {
			if (did_consume)
				return kNotFound;  // Unexpected
			continue;
		}
		++key;
		pos = ++offset;  // Dive into child
	}
	return kNotFound;  // No match
}

size_t GetRegistryLengthImpl(const std::string& host, UnknownRegistryFilter unknown_filter, PrivateRegistryFilter private_filter) {
	ASSERT(!host.empty());

	// Skip leading dots.
	const size_t host_check_begin = host.find_first_not_of('.');
	if (host_check_begin == std::string::npos)
		return 0;  // Host is only dots.

				   // A single trailing dot isn't relevant in this determination, but does need
				   // to be included in the final returned length.
	size_t host_check_len = host.length();
	if (host[host_check_len - 1] == '.') {
		--host_check_len;
		ASSERT(host_check_len > 0);  // If this weren't true, the host would be ".",
									 // and we'd have already returned above.
		if (host[host_check_len - 1] == '.')
			return 0;  // Multiple trailing dots.
	}

	// Walk up the domain tree, most specific to least specific,
	// looking for matches at each level.
	size_t prev_start = std::string::npos;
	size_t curr_start = host_check_begin;
	size_t next_dot = host.find('.', curr_start);
	if (next_dot >= host_check_len)  // Catches std::string::npos as well.
		return 0;  // This can't have a registry + domain.
	while (1) {
		const char* domain_str = host.data() + curr_start;
		size_t domain_length = host_check_len - curr_start;
		int type = LookupString(g_graph, g_graph_length, domain_str, domain_length);
		bool do_check = (type != kNotFound) && (!(type & kPrivateRule) || private_filter == INCLUDE_PRIVATE_REGISTRIES);

		// If the apparent match is a private registry and we're not including
		// those, it can't be an actual match.
		if (do_check) {
			// Exception rules override wildcard rules when the domain is an exact
			// match, but wildcards take precedence when there's a subdomain.
			if (type & kWildcardRule && (prev_start != std::string::npos)) {
				// If prev_start == host_check_begin, then the host is the registry
				// itself, so return 0.
				return (prev_start == host_check_begin) ? 0 : (host.length() - prev_start);
			}

			if (type & kExceptionRule) {
				if (next_dot == std::string::npos) {
					// If we get here, we had an exception rule with no dots (e.g.
					// "!foo").  This would only be valid if we had a corresponding
					// wildcard rule, which would have to be "*".  But we explicitly
					// disallow that case, so this kind of rule is invalid.
					//NOTREACHED() << "Invalid exception rule";
					return 0;
				}
				return host.length() - next_dot - 1;
			}

			// If curr_start == host_check_begin, then the host is the registry
			// itself, so return 0.
			return (curr_start == host_check_begin) ? 0 : (host.length() - curr_start);
		}

		if (next_dot >= host_check_len)  // Catches std::string::npos as well.
			break;

		prev_start = curr_start;
		curr_start = next_dot + 1;
		next_dot = host.find('.', curr_start);
	}

	// No rule found in the registry.  curr_start now points to the first
	// character of the last subcomponent of the host, so if we allow unknown
	// registries, return the length of this subcomponent.
	return unknown_filter == INCLUDE_UNKNOWN_REGISTRIES ? (host.length() - curr_start) : 0;
}

std::string GetDomainAndRegistryImpl(
	const std::string& host, PrivateRegistryFilter private_filter) {
	ASSERT(!host.empty());

	// Find the length of the registry for this host.
	const size_t registry_length = GetRegistryLengthImpl(host, INCLUDE_UNKNOWN_REGISTRIES, private_filter);
	if ((registry_length == std::string::npos) || (registry_length == 0))
		return std::string();  // No registry.
							   // The "2" in this next line is 1 for the dot, plus a 1-char minimum preceding
							   // subcomponent length.
	ASSERT(host.length() >= 2);
	if (registry_length > (host.length() - 2)) {
		//NOTREACHED() << "Host does not have at least one subcomponent before registry!";
		return std::string();
	}

	// Move past the dot preceding the registry, and search for the next previous
	// dot.  Return the host from after that dot, or the whole host when there is
	// no dot.
	const size_t dot = host.rfind('.', host.length() - registry_length - 2);
	if (dot == std::string::npos)
		return host;
	return host.substr(dot + 1);
}

std::string GetDomainAndRegistry(const std::string& host, PrivateRegistryFilter filter) {
//	url::CanonHostInfo host_info;
// 	const std::string canon_host(CanonicalizeHost(host, &host_info));
// 	if (canon_host.empty() || host_info.IsIPAddress())
// 		return std::string();
    if (host.empty())
        return std::string();
	return GetDomainAndRegistryImpl(host, filter);
}

////

bool DomainIsHostOnly(const std::string& domain_string) {
	return (domain_string.empty() || domain_string[0] != '.');
}

std::string GetEffectiveDomain(const std::string& scheme, const std::string& host) {
	if (scheme == "http" || scheme == "https") {
		return GetDomainAndRegistry(host, INCLUDE_PRIVATE_REGISTRIES);
	}

	if (!DomainIsHostOnly(host))
		return host.substr(1);
	return host;
}

bool GetCookieDomainWithString(const blink::KURL& url, const std::string& domain_string, std::string* result) {
	const std::string url_host(WTF::WTFStringToStdString(url.host()));

	// If no domain was specified in the domain string, default to a host cookie.
	// We match IE/Firefox in allowing a domain=IPADDR if it matches the url
	// ip address hostname exactly.  It should be treated as a host cookie.
	if (domain_string.empty() /*|| (url.HostIsIPAddress() && url_host == domain_string)*/) {
		*result = url_host;
		ASSERT(DomainIsHostOnly(*result));
		return true;
	}

	// Get the normalized domain specified in cookie line.
// 	url::CanonHostInfo ignored;
// 	std::string cookie_domain(CanonicalizeHost(domain_string, &ignored));
    std::string cookie_domain = domain_string;
	if (cookie_domain.empty())
		return false;
	if (cookie_domain[0] != '.')
		cookie_domain = "." + cookie_domain;

	// Ensure |url| and |cookie_domain| have the same domain+registry.
	const std::string url_scheme(WTF::WTFStringToStdString(url.protocol()));
	const std::string url_domain_and_registry(GetEffectiveDomain(url_scheme, url_host));
	if (url_domain_and_registry.empty())
		return false;  // IP addresses/intranet hosts can't set domain cookies.
	const std::string cookie_domain_and_registry(GetEffectiveDomain(url_scheme, cookie_domain));
	if (url_domain_and_registry != cookie_domain_and_registry)
		return false;  // Can't set a cookie on a different domain + registry.

					   // Ensure |url_host| is |cookie_domain| or one of its subdomains.  Given that
					   // we know the domain+registry are the same from the above checks, this is
					   // basically a simple string suffix check.
	const bool is_suffix = (url_host.length() < cookie_domain.length()) ?
		(cookie_domain != ("." + url_host)) :
		(url_host.compare(url_host.length() - cookie_domain.length(), cookie_domain.length(), cookie_domain) != 0);
	if (is_suffix)
		return false;

	*result = cookie_domain;
	return true;
}

// Parse a cookie expiration time.  We try to be lenient, but we need to
// assume some order to distinguish the fields.  The basic rules:
//  - The month name must be present and prefix the first 3 letters of the
//    full month name (jan for January, jun for June).
//  - If the year is <= 2 digits, it must occur after the day of month.
//  - The time must be of the format hh:mm:ss.
// An average cookie expiration will look something like this:
//   Sat, 15-Apr-17 21:01:22 GMT

// base::Time ParseCookieTime(const std::string& time_string) {
//   static const char* kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun",
//                                    "jul", "aug", "sep", "oct", "nov", "dec" };
//   static const int kMonthsLen = arraysize(kMonths);
//   // We want to be pretty liberal, and support most non-ascii and non-digit
//   // characters as a delimiter.  We can't treat : as a delimiter, because it
//   // is the delimiter for hh:mm:ss, and we want to keep this field together.
//   // We make sure to include - and +, since they could prefix numbers.
//   // If the cookie attribute came in in quotes (ex expires="XXX"), the quotes
//   // will be preserved, and we will get them here.  So we make sure to include
//   // quote characters, and also \ for anything that was internally escaped.
//   static const char* kDelimiters = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{|}~";
// 
//   base::Time::Exploded exploded = {0};
// 
//   base::StringTokenizer tokenizer(time_string, kDelimiters);
// 
//   bool found_day_of_month = false;
//   bool found_month = false;
//   bool found_time = false;
//   bool found_year = false;
// 
//   while (tokenizer.GetNext()) {
//     const std::string token = tokenizer.token();
//     DCHECK(!token.empty());
//     bool numerical = IsAsciiDigit(token[0]);
// 
//     // String field
//     if (!numerical) {
//       if (!found_month) {
//         for (int i = 0; i < kMonthsLen; ++i) {
//           // Match prefix, so we could match January, etc
//           if (base::strncasecmp(token.c_str(), kMonths[i], 3) == 0) {
//             exploded.month = i + 1;
//             found_month = true;
//             break;
//           }
//         }
//       } else {
//         // If we've gotten here, it means we've already found and parsed our
//         // month, and we have another string, which we would expect to be the
//         // the time zone name.  According to the RFC and my experiments with
//         // how sites format their expirations, we don't have much of a reason
//         // to support timezones.  We don't want to ever barf on user input,
//         // but this DCHECK should pass for well-formed data.
//         // DCHECK(token == "GMT");
//       }
//     // Numeric field w/ a colon
//     } else if (token.find(':') != std::string::npos) {
//       if (!found_time &&
// #ifdef COMPILER_MSVC
//           sscanf_s(
// #else
//           sscanf(
// #endif
//                  token.c_str(), "%2u:%2u:%2u", &exploded.hour,
//                  &exploded.minute, &exploded.second) == 3) {
//         found_time = true;
//       } else {
//         // We should only ever encounter one time-like thing.  If we're here,
//         // it means we've found a second, which shouldn't happen.  We keep
//         // the first.  This check should be ok for well-formed input:
//         // NOTREACHED();
//       }
//     // Numeric field
//     } else {
//       // Overflow with atoi() is unspecified, so we enforce a max length.
//       if (!found_day_of_month && token.length() <= 2) {
//         exploded.day_of_month = atoi(token.c_str());
//         found_day_of_month = true;
//       } else if (!found_year && token.length() <= 5) {
//         exploded.year = atoi(token.c_str());
//         found_year = true;
//       } else {
//         // If we're here, it means we've either found an extra numeric field,
//         // or a numeric field which was too long.  For well-formed input, the
//         // following check would be reasonable:
//         // NOTREACHED();
//       }
//     }
//   }
// 
//   if (!found_day_of_month || !found_month || !found_time || !found_year) {
//     // We didn't find all of the fields we need.  For well-formed input, the
//     // following check would be reasonable:
//     // NOTREACHED() << "Cookie parse expiration failed: " << time_string;
//     return base::Time();
//   }
// 
//   // Normalize the year to expand abbreviated years to the full year.
//   if (exploded.year >= 69 && exploded.year <= 99)
//     exploded.year += 1900;
//   if (exploded.year >= 0 && exploded.year <= 68)
//     exploded.year += 2000;
// 
//   // If our values are within their correct ranges, we got our time.
//   if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 &&
//       exploded.month >= 1 && exploded.month <= 12 &&
//       exploded.year >= 1601 && exploded.year <= 30827 &&
//       exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) {
//     return base::Time::FromUTCExploded(exploded);
//   }
// 
//   // One of our values was out of expected range.  For well-formed input,
//   // the following check would be reasonable:
//   // NOTREACHED() << "Cookie exploded expiration failed: " << time_string;
// 
//   return base::Time();
// }

blink::KURL CookieOriginToURL(const std::string& domain, bool is_https) {
	if (domain.empty())
		return blink::KURL();

	const std::string scheme = is_https ? "https" : "http";
	const std::string host = domain[0] == '.' ? domain.substr(1) : domain;
    const std::string url = scheme + "://" + host;
	return blink::KURL(blink::ParsedURLString, url.c_str());
}

}  // namespace cookie_utils
}  // namespace net

